import requests
import numpy as np
import seaborn as sns
import json
import matplotlib.pyplot as plt
import pandas as pd
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.les_mis import data
hv.extension('bokeh')
hv.output(size=200)
I formulate the query to the GraphQL API implemented by the subgraph TetherUSDT: in detail, I want to retrieve the following features with regard to issues, transfers and approvals.
query = """query {
issues(orderBy: amount, orderDirection: asc) {
id
amount
}
transfers {
id
from
to
value
}
approvals {
id
owner
spender
value
}
}"""
I use a temporary query url instead of the real query URL with the generated API key (https://gateway.testnet.thegraph.com/api/e0e91a841b861b1b22da614b4d6ef4b7/subgraphs/id/EW29ZpxgJRNUzjW5qs1KRyijWh2t171WmgNJ8sNmN7P5), since the latter method requires the adding of some GRT to my billing balance.
url = "https://api.studio.thegraph.com/query/16935/tetherusdt/v0.0.3"
r = requests.post(url, json={'query': query})
print(r.status_code)
#print(r.text)
200
json_data = json.loads(r.text)
df_issues = pd.DataFrame(json_data["data"]["issues"])
df_issues['amount'] = pd.to_numeric(df_issues['amount'])
The first 5 records of issues DataFrame.
df_issues.head()
| id | amount | |
|---|---|---|
| 0 | 0x2ec30181b26f842558280b682a5a58e15cae6498b4c1... | 8005642000 |
| 1 | 0x8cfc4f5f4729423f59dd1d263ead2f824b3f133b02b9... | 10000000000 |
| 2 | 0x27b0df3879a34fff8abd827eb0a05892041af7763a88... | 10000000000000 |
| 3 | 0x31f01e3f69d763c70e9965c370475f454338effdcca4... | 15000000000000 |
| 4 | 0x79595df1a5d8b96c017cd78bbd844fb68c94c19edb55... | 15000000000000 |
values = list(df_issues['amount'].value_counts().sort_index().keys())
frequencies = df_issues['amount'].value_counts().sort_index()
plt.figure(figsize=(35,10))
ax = sns.barplot(x=values, y=frequencies)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 30)
xlabels = [j//1000000000 for j in values]
ax.set_xticklabels(xlabels)
plt.title("Bar plot of the frequencies of issues amounts (in billion)")
sns.set(font_scale = 6)
plt.show()
df_approvals = pd.DataFrame(json_data["data"]["approvals"])
The value feature assumes incredibly high number.
df_approvals['value'][0]
'115792089237316195423570985008687907853269984665640564039457584007913129639935'
In order to plot these values, I try to manage their magnitude through standardization (min-max scaling), however I discover that the best approach is approximate each amount with the number of its digits.
values_length = [len(i) for i in df_approvals['value']]
plt.figure(figsize=(30,6))
sns.histplot(values_length, discrete=True)
plt.title("Frequency distribution of values' length")
sns.set(font_scale = 0.5)
plt.show()
df_approvals['value'] = values_length
graph = hv.Sankey(df_approvals.iloc[:, 1:])
graph.opts(
opts.Sankey(label_position='left', width=600, height=1100, cmap='Set1',
edge_color=dim('spender').str(), node_color=dim('spender').str()))
df_transfers = pd.DataFrame(json_data["data"]["transfers"])
df_transfers['value'] = pd.to_numeric(df_transfers['value'])
The first 5 records of transfers DataFrame.
df_transfers.head()
| id | from | to | value | |
|---|---|---|---|---|
| 0 | 0x00000112ed5c1d8741bb77c2b0920758ea29bc1c567a... | 0xcf3618d4680817af786a1d93465a19ab4225e69e | 0xd331227a7fe6682a93e8fa07700779f11996dd3a | 527000000 |
| 1 | 0x0000018b18fd871bd95081b75f2c1b18d9029c70026d... | 0xa4cbedac57819876f8fb0f69bb6cc3d7a61c4368 | 0xe1f067117527457ddca0209b0bbb930268d1be16 | 130000000 |
| 2 | 0x000002b2cff6dc4e25c875127befcbd091cd7ccceed4... | 0x99e462e3de92728b2a57a7b4b83a58dba98de71a | 0x32cd0b721fa6999af38d674220eb5daabf383e44 | 495000000 |
| 3 | 0x000002c8f9106b692c0d06fc042d60fa124bd0731a82... | 0xe0c1582a5cd193172624658ed0abeecea24835ad | 0x3eca50bc396cd1464f61e3fe0bb6506d45c90c53 | 173002900 |
| 4 | 0x000002ccee593d698e68923156b131d44518cb7cb38b... | 0x3a71e3b4a01221e66007d7b1d3f8566b788edac6 | 0x502a76d02dfaeb9a7907a4e4b28fb66519ba7d60 | 1498500000 |
chord = hv.Chord(df_transfers.iloc[:, 1:])
chord.opts(
opts.Chord(cmap='Set1', edge_cmap='Set1', edge_color='from', labels='from', node_color='from', width=600, height=600))
The GraphQL query language presents high flexibility regarding query constructs: for instance, I can query a subset of the available features of transfers filtering by a specific source address.
query_filtered = """query {
transfers(where: {from: "0xfdb16996831753d5331ff813c29a93c76834a0ad"}) {
from
to
value
}
}"""
r = requests.post(url, json={'query': query_filtered})
print(r.status_code)
#print(r.text)
json_data = json.loads(r.text)
df_transfers_filtered = pd.DataFrame(json_data["data"]["transfers"])
200
df_transfers_filtered
| from | to | value | |
|---|---|---|---|
| 0 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0xae844ed72c0f0bb362c39a98b00ac7cefbb712bc | 1415430594 |
| 1 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x70865473b490d87e9080820b0a9c28509b812d43 | 274862068 |
| 2 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x8b6eb0e9c4bcb5e7377856625bb881015731f3c6 | 109349000000 |
| 3 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x29aa34841ff82914f63e6c3e5fad22d2855fd1d1 | 129000000 |
| 4 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x74e821fa3af6fdd075490832ce1742b117f5d2d1 | 690000000 |
| ... | ... | ... | ... |
| 95 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0xfb3da7509082b1dcb5d1447fc51acca241e7159f | 130000000 |
| 96 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x957ff9cc9e514fb8c572835288cd8736841efb1d | 306001363 |
| 97 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0xb0477a0c68a548e8d4a0017b5bd8959218d0d94a | 14683287812 |
| 98 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x1be889576c1a4effd0acc032c1a6971438ce966b | 14044903954 |
| 99 | 0xfdb16996831753d5331ff813c29a93c76834a0ad | 0x4ba4adc2d04935236c604a5b9eb804577f2ebc49 | 202000000 |
100 rows × 3 columns